/*******************************************************************************
 * Package: com.song.service
 * Type:    WordCountService
 * Date:    2024-11-04 16:07
 *
 * Copyright (c) 2024 LTD All Rights Reserved.
 *
 * You may not use this file except in compliance with the License.
 *******************************************************************************/
package com.song.service;

import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.springframework.stereotype.Service;
import scala.Tuple2;

import java.util.Arrays;
import java.util.Map;

/**
 * 功能描述：
 *
 * @author Songxianyang
 * @date 2024-11-04 16:07
 */
@Service
@RequiredArgsConstructor
@Slf4j
public class WordCountService {

    private final JavaSparkContext javaSparkContext;

    public static final String FILE_PATH="yqmm-spark/file/file.txt";



    public Map<Object, Long> countWords() {
        // 读取文件内容为RDD
        JavaRDD<String> lines = javaSparkContext.textFile(FILE_PATH);
        // 将每行拆分成单词  (正则拆单词)
        JavaRDD<String> words = lines.flatMap(line -> Arrays.asList(line.split("\\W+")).iterator());
        // 过滤掉空字符串
        words = words.filter(word -> !word.isEmpty());
        // 将单词映射为键值对（word, 1）
        JavaPairRDD<Object, Object> wordPairs = words.mapToPair(word -> new Tuple2<>(word, 1));
        // 聚合相同单词的计数
        Map<Object, Long> wordCounts = wordPairs.countByKey();
        return wordCounts;
    }
}
